import math,random
import quandl
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,SGDRegressor,BayesianRidge,ARDRegression,PassiveAggressiveRegressor,TheilSenRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor,StackingRegressor,VotingRegressor
from sklearn.neural_network import MLPRegressor
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
stock = 'MSFT'
daysToForecast = 251
def getStockData(stock):
quandl.ApiConfig.api_key = "qWcicxSctVxrP9PhyneG"
allData = quandl.get('WIKI/'+stock)
return allData
def FormatDataForModel(dataArray):
dataArray = dataArray[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
dataArray['HL_PCT'] = (dataArray['Adj. High'] - dataArray['Adj. Close']) / dataArray['Adj. Close'] * 100.0
dataArray['PCT_change'] = (dataArray['Adj. Close'] - dataArray['Adj. Open']) / dataArray['Adj. Open'] * 100.0
dataArray = dataArray[['Adj. Close', 'HL_PCT', 'PCT_change','Adj. Volume']]
dataArray.fillna(-99999, inplace=True)
return dataArray
def PreprocessData(mlData,daysToForecast):
forecast_col = 'Adj. Close'
forecast_out = int(math.ceil(0.12*daysToForecast))
mlData['label'] = mlData[forecast_col].shift(-forecast_out)
#mlData.dropna(inplace=True)
X = np.array(mlData.drop(['label'],1))
X = preprocessing.scale(X)
X_data = X[-daysToForecast:]
X = X[:-daysToForecast]
forecastData = mlData[-daysToForecast:]
trainData= mlData[:-daysToForecast]
y = np.array(trainData['label'])
response = [X,y,X_data,forecastData]
return response
def TrainAndPredict(model,X,y,X_data):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
accuracy = model.score(X_test, y_test)
prediction = model.predict(X_data)
return accuracy, prediction
def addPredictionToForecast(prediction,forecastData):
forecastData = forecastData[['Adj. Close']]
forecastData = forecastData.rename(columns={'Adj. Close':'EOD'})
forecastData['prediction'] = prediction[:]
return forecastData
def GraphPredictions(forecastData,stock):
fig = px.line(forecastData)
fig.update_layout(title=stock,
xaxis_title='Time',
yaxis_title='Price')
fig.show()
def GraphAllData(allData,forecastData,stock):
result = pd.concat([allData['Adj. Close'],forecastData['prediction']],axis =1, sort=False)
fig = px.line(result)
fig.update_layout(title=stock,
xaxis_title='Time',
yaxis_title='Price')
fig.show()
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
forecastData = addPredictionToForecast(prediction,forecastData)
print(accuracy)
GraphPredictions(forecastData,stock)
GraphAllData(allData,forecastData,stock)
stock_list = ['AAPL', 'IBM', 'MSFT', 'WMT','AMZN','TSLA','HP']
print("Stock: ", stock)
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
model = LinearRegression()
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
print("Accuracy: ", accuracy)
forecastData = addPredictionToForecast(prediction,forecastData)
GraphPredictions(forecastData,stock)
GraphAllData(allData,forecastData,stock)
model_list = [[LinearRegression(), "LinearRegression"],
[SVR(),"SupportVectorRegression"],
[MLPRegressor(),"MLPRegressor"],
[SGDRegressor(),"SGDRegressor"],
[BayesianRidge(),"BayesianRidge"],
[ARDRegression(),"ARDRegression"],
[PassiveAggressiveRegressor(),"PassiveAggressiveRegressor"],
[TheilSenRegressor(),"TheilSenRegressor"]]
model_results = []
stock_dfs = []
for stock in stock_list:
print("Stock: ", stock)
allData = getStockData(stock)
mlData = FormatDataForModel(allData)
X,y,X_data,forecastData = PreprocessData(mlData,daysToForecast)
df_stocks = forecastData[['Adj. Close']]
df_stocks = df_stocks.rename(columns={'Adj. Close':stock+' Actual'})
for model,name in model_list:
accuracy,prediction=TrainAndPredict(model,X,y,X_data)
print("Model: ",name , " ","Accuracy:", accuracy)
model_results.append((name,stock,accuracy))
df_stocks[name] = prediction[:]
stock_dfs.append((stock,df_stocks))
model_names = []
for model,name in model_list:
model_names.append(name)
df = pd.DataFrame(columns=stock_list,index=model_names)
for i in model_results:
df.at[i[0],i[1]] = i[2]
df
highest = []
for i in df.columns:
highest.append([i, df[i].astype(float).idxmax(), df[i].max()])
df_high = pd.DataFrame(highest, columns=["Stock","Model","Accuracy"])
df_high
average = df.mean(axis=1)
average.sort_values(ascending=False)
for stock,stock_df in stock_dfs:
fig = px.line(stock_df)
fig.update_layout(title=stock,
xaxis_title='Time',
yaxis_title='Price')
fig.show()